# This file contains the S-plus code that was used to implement the estimators and functionals in "Local polynomial fitting in failure rate estimation", IEEE Transactions on reliability, (57): 41-52, (2008)
# As a fully working example the code herein can reproduce Figure 3 of the paper (the relevant data are also given below). 
# The same functions (with different input data) can be used in reproducing other figures of the same paper 
# More general use of the estimates is also feasible by supplying the corresponding data set.  Use of the functions is self - explanatory and is further facilitated by the comments at he preamble of each function.

#N.B. Compatibility with R is not guaranteed as the functions were not written for use with R - however, with minimal modifications, their use in the R environment should be feasible.

suicdata<-c( 		0.01,0.01, 0.01, 0.01, 0.01, 0.01, 0.02,0.02,
                             0.02,0.02,  0.03,0.04,0.06,0.08, 0.1,0.1, 0.12,0.12,0.12, 0.13,0.14, 0.15,0.15,0.15,
				0.16, 0.16,0.17, 0.18,0.18, 0.19,0.20, 0.21,
				0.22, 0.23,0.25, 0.26, 0.28, 0.28, 0.30, 0.32,
				0.34, 0.36, 0.38, 0.39, 0.41, 0.41, 0.42, 0.43, 
				0.44, 0.44, 0.45, 0.45, 0.5, 0.53, 0.56, 0.58,
				0.58, 0.61, 0.62, 0.62, 0.62, 0.64, 0.66, 0.70,
				0.70, 0.70, 0.72, 0.77, 0.78, 0.78, 0.80, 0.82,
				0.83, 0.85, 0.86, 0.96, 0.97, 0.98, 0.99, 1.05,
				1.06, 1.07, 1.18, 1.35, 1.36, 1.42, 1.55, 1.59,
				1.65, 1.73, 1.77, 1.79, 1.80, 1.91, 2.09, 2.14,
				2.15, 2.15, 2.31, 2.33, 2.36, 2.36, 2.43, 2.45,
				2.50, 2.51, 2.58, 2.64, 2.68, 3.08, 3.94, 4.12,
				4.33, 4.42, 4.53, 4.88, 4.97, 5.11, 5.32, 5.55,
				6.63, 6.89, 7.62, 11.41, 11.76, 11.85, 12.36, 13.22)
	cat(" ", "\n", mean(suicdata), " ", length(suicdata), "\n")


"NmaxSelection"<-function(n, Nstar)
#implementation of equation 10 in Ruppert, Sheather and Wand, JASA (1995)
#n = sample size, Nstar = 5 or higher 
{
	max(min(trunc(n/20), Nstar),1)
}

"CreateSubsamples"<-function(xin, yin, N)
#This divides the (ordered) sample to N equally sized subsamples
#Also divides the ordered, according to the X_i's true values - Y_i - to  subsamples, so that they correspond to the X_i subsamples
#The result is returned in matrix form. 
{
	index<-order(xin)
	xin<- xin[index]
	yin<-yin[index]
	n<-    length(xin)
	t<-     n/N
	#cat("t = ", t)
	if(is.integer(t) != T){ t <- trunc(t)} #if N does not divide n then truncate t to the nearest integer to zero (some observations are lost)
	SubsamplesMatrixX<-matrix(xin, ncol=t, nrow=N) #put the subsamples (in blocks of N observations) in a matrix: column wise
	SubsamplesMatrixY<-matrix(yin, ncol=t, nrow=N) 
	list(SubsamplesMatrixX, SubsamplesMatrixY)
} 



"EstimateNhat"<-function(xin, yin)
# This, by minimizing Mallow's Cp by quartic regression (see Ruppert, Sheather and Wand, JASA (1995), page 1262)
# finds and returns N = number of blocks to be used in order to find an estimate of the regression function
{
	Nstar<-5
	n<-    length(xin)
	N<-NmaxSelection(n, Nstar)
	index<-order(xin)
	xin<- xin[index]
	yin<-yin[index]
	Nseq<-seq(1, N, by =1)
	RSS<-vector("numeric")
	for(i in 1:N)
	{
	   t<-     n/Nseq[i]
	if(is.integer(t) != T){ t <- trunc(t)} #if Nseq does not divide n then truncate t to the nearest integer to zero (some observations are lost)
	SubsamplesMatrixX<-matrix(xin, ncol=t, nrow=Nseq[i]) #put the subsamples (in blocks of N observations) in a matrix: column wise
	SubsamplesMatrixY<-matrix(yin, ncol=t, nrow=Nseq[i]) 
	#Coefficients.Matrix<-matrix(ncol=5, nrow=
	Residuals.Matrix<-matrix(0,nrow=Nseq[i], ncol=t) #NOMIZO - MAKE SURE
	Fit.Matrix<-matrix(0,nrow=Nseq[i], ncol=t)
		for(j in 1:Nseq[i])  
		 {						
			arg1<-lm(SubsamplesMatrixY[j,]~poly(SubsamplesMatrixX[j,], degree=4))
			Residuals.Matrix[j,]<-resid(arg1) 					
			Fit.Matrix[j,]<-fitted.values(arg1)
	 	}
	RSS[i]<-sum(Residuals.Matrix^2)
	}
	Cp<-RSS/(RSS[N]/(n-5*N)) - (n-10*N)
	index<-order(Cp)[1]	
	index
}

"QuarticRegressionEstimate"<-function(xin, yin)
{
	N<-EstimateNhat(xin, yin)	
	n <-length(xin)
	 t<-n/N
		if(is.integer(t) != T){ t <- trunc(t)} 	
	SubsamplesMatrixX<-matrix(xin, ncol=t, nrow=N) #put the subsamples (in blocks of N observations) in a matrix: column wise
	SubsamplesMatrixY<-matrix(yin, ncol=t, nrow=N) 
	Fit.Matrix<-matrix(0,nrow=N, ncol=t)
	Coefficients.Matrix<-matrix(0, ncol=5, nrow=N)
	for(j in 1:N)  
		 {						
			polyarg<-poly(SubsamplesMatrixX[j,], degree=4)
			arg1<-lm(SubsamplesMatrixY[j,]~polyarg)		
			Fit.Matrix[j,]<-fitted.values(arg1)
			Coefficients.Matrix[j,]<-poly.transform(polyarg, coef(arg1))
			
	 	}

out<-list(Coefficients.Matrix, SubsamplesMatrixX, SubsamplesMatrixY, Fit.Matrix) #return the coefficients and the fit - first argument: coefficients. second argument : fit. 
names(out)<-c("Coefficients.Matrix", "SubsamplesMatrixX", "SubsamplesMatrixY", "Fit.Matrix")
out
}

"SecondDerivativeQuarticEstimate"<-function(xin, yin)
{
	CoefAndFit<-QuarticRegressionEstimate(xin,yin)
	CoefficientsMatrix<-CoefAndFit$Coefficients.Matrix
	#cat(CoefficientsMatrix)
	SubsamplesMatrixX<-CoefAndFit$SubsamplesMatrixX
	SubsamplesMatrixY<-CoefAndFit$SubsamplesMatrixY
	SecondDerivIntercept<-2*CoefficientsMatrix[,3]		
	SecondDerivBeta1<-6*CoefficientsMatrix[,4]
	SecondDerivBeta2<-12*CoefficientsMatrix[,5]
	SubsamplesMatrixXsquared<-SubsamplesMatrixX^2
	FittedOutMatrix<- SecondDerivIntercept+ SecondDerivBeta1*SubsamplesMatrixX+ SecondDerivBeta2*SubsamplesMatrixXsquared
	
	#plot(as.vector(SubsamplesMatrixX), as.vector(FittedOutMatrix), type="l")
	#lines(xin,  .5*(-.5)*(-1.5)*.8^.5*(xin^(-2.5)))
	as.vector(FittedOutMatrix)
}



"LambdaHat"<-
#Watson & Leadbetter estimator (Hazard Analysis I, Biometrika (1964))
# -order statistics version. 
function(xin, xout, h, kfun){
    n<- length(xin)
    n1<-1:n
    nn<-length(xout)
    xin.use<-sort(xin)
    #cat(xin.use)
    hazard.rate<-vector("numeric",nn)
    for(i in 1:nn){
        hazard.rate[i]<-(1/h)*sum((kfun((xout[i]-xin.use)/h))/(n-n1+1))
            }
            hazard.rate
}


"Epanechnikov"<-
function(x){
	#Epanechikov kernel, square root of 5 is taken as 2.236068 (to save comp. time).
	ifelse(abs(x)< 2.236068, (3/4)*(1-((x^2)/5 ))/2.236068, ifelse(abs(x)>2.236068, 0,0))
}


"sn.0"<-function(xin, xout, h, kfun) { sapply(1:length(xout), function(i, xin, xout, h, kfun) sum(kfun((xin-xout[i])/h)), xin, xout, h, kfun) }

"sn.1"<-function(xin, xout, h, kfun) { sapply(1:length(xout), function(i, xin, xout, h, kfun)  sum(kfun((xin-xout[i])/h)*(xin-xout[i])), xin, xout, h, kfun) }

"sn.2"<-function(xin, xout, h, kfun) { sapply(1:length(xout), function(i, xin, xout, h, kfun) sum(kfun((xin-xout[i])/h)*((xin-xout[i])^2)), xin, xout, h, kfun) }

"sn.3"<-function(xin, xout, h, kfun){sapply(1:length(xout), function(i, xin, xout, h, kfun)  sum(kfun((xin-xout[i])/h)*((xin-xout[i])^3)), xin, xout, h, kfun) }

"sn.4"<-function(xin, xout, h, kfun) {sapply(1:length(xout), function(i, xin, xout, h, kfun)  sum(kfun((xin-xout[i])/h)*((xin-xout[i])^4)), xin, xout, h, kfun) }

"sn.5"<-function(xin, xout, h, kfun){sapply(1:length(xout), function(i, xin, xout, h, kfun)  sum(kfun((xin-xout[i])/h)*((xin-xout[i])^5)), xin, xout, h, kfun) }

"sn.6"<-function(xin, xout, h, kfun) {sapply(1:length(xout), function(i, xin, xout, h, kfun)  sum(kfun((xin-xout[i])/h)*((xin-xout[i])^6)), xin, xout, h, kfun) }



"tn.0"<-function(xin, xout, h, kfun, Y) { sapply(1:length(xout), function(i, xin, xout, h, kfun,Y)  sum(kfun((xin-xout[i])/h)*Y), xin, xout, h, kfun, Y) }

"tn.1"<-function(xin, xout, h, kfun, Y){sapply(1:length(xout), function(i, xin, xout, h, kfun,Y)  sum(kfun((xin-xout[i])/h)*((xin-xout[i])*Y)),xin, xout, h, kfun, Y) }

"tn.2"<-function(xin, xout, h, kfun, Y){sapply(1:length(xout), function(i, xin, xout, h, kfun,Y) sum(kfun((xin-xout[i])/h)*((xin - xout[i])^2 )*Y),xin, xout, h, kfun, Y) }

"tn.3"<-function(xin, xout, h, kfun, Y){sapply(1:length(xout), function(i, xin, xout, h, kfun,Y) sum(kfun((xin-xout[i])/h)*((xin - xout[i])^3 )*Y),xin, xout, h, kfun, Y) }

"IntEpanechnikov"<-function(x)
{
	ifelse(x< -2.236068, 0, ifelse(x> 2.236068, 1, .5- (2.236068*x*(x^2-15))/100))
}

"Ldoubledash"<-function(x)
{
	dnorm(x,0,1)*(x^2-1)
}

"PsiEst"<-function(x,g)
#estimate of the \psi_2 functional to be used for the altman method.
{
	n<-length(x)
	Muse<-matrix(0, n,n)
	for(i in 1:n)
	{
		for(j in 1:n)
		{
		 Muse[i,j]<-Ldoubledash((x[i]-x[j])/g)	
		}
	}
	sum(Muse)/(n^2)
}

"AltmanBand"<-function(x,g)
{
	(45/(-7*PsiEst(x,g)*length(x)))^(1/3)
}

"IntKde"<-function(xin, xout, h, kfun)
{
	n<- length(xin)
	arg1<-(sapply(xout, "-", xin))/h	
	arg2<-kfun(arg1)
	arg3<-colSums(arg2)
	arg3/n
}
	
"PilotWidth1"<- function(sample)
{
    #returns Silverman's default width 1.06 A n^(-1/5)
    n<-length(sample)
    StD<-stdev(sample)
    IqR<-diff(quantile(sample, c(.25, .75)))
    tmp<-min(StD, IqR/1.34)
    DefWidth<- 1.06*tmp*n^(-1/5)
    DefWidth
}

"PlugInOptBand"<-function(xin, xout, kfun, IntKfun, ci)
{
	N<-length(xin) 
	a<-PilotWidth1(xin) 
	b<-AltmanBand(xin, (length(xin))^(-.3))
	c<-IntKde(xin, max(xout), b, IntKfun)
	d<-IntKde(xin, max(xout), a, IntKfun)
	Muse<-1/(1-IntKde(xin, max(xout), b, IntKfun)) -1
	ThetaUse<-Theta22(xin, a, kfun, ci)
	top<-.6 #*Muse        #R(K)*M, M=1/(1-F(T))-1)
	bottom<- N * (2/15) * ThetaUse     #N*\mu_2^2(K)*\theta_2^2
	cat("a=", a, " ", "b= ", b, " ","c= ", c, " ", "d= ", d, " ", "M = ", Muse, " ", "theta22 = ", ThetaUse)
	(top/bottom)^0.2
}

"Theta22"<-function(xin, PilotBand, kfun, ci)
{
	theta=  (2*b2(xin, xin, PilotBand, Epanechnikov, ci))^2
	sum(theta)
}
	
"b2"<-function(xin, xout, h, kfun, ci)
{
	sn0<-sn.0(xin, xout, h, kfun)
	sn1<-sn.1(xin, xout, h, kfun)
	sn2<-sn.2(xin, xout, h, kfun)
	sn3<-sn.3(xin, xout, h, kfun)
	sn4<-sn.4(xin, xout, h, kfun)
	tn0<-tn.0(xin, xout, h, kfun, ci)
	tn1<-tn.1(xin, xout, h, kfun, ci)
	tn2<-tn.2(xin, xout, h, kfun, ci)
	top<- tn2*((sn1^2)-sn2*sn0)+tn1*(sn3*sn0-sn1*sn2)+tn0*((sn2^2)-sn1*sn3)
	bot<-(sn3^2) * sn0 -2*(sn1 *sn2 * sn3) - (sn0*sn2*sn4) + (sn1^2) *sn4 + sn2^2
	top/bot
}
"Biweight"<-function(x)
{
	#My Biweight Kernel :-)
	ifelse(abs(x) < 1, (15/16) * (  1 - x^2)^2, ifelse(abs(x) > 1, 0, 0))
}


######################################W J ########################
"a0BiwWJ"<-function(a)
{
	(15/16) * a -(5/8)*a^3+(3/16)*a^5+0.5
}

"a1BiwWJ"<-function(a)
{
	(15/32) *a^2- (15/32)* a^4+ (5/32) *a^6 - 5/32
}

"a2BiwWJ"<-function(a)
{
	(5/16)* a^3- (3/8)*a^5+(15/112)*a^7 +1/14
}
######################################W J ########################
"BoundaryBiweightWJ"<-   function(x,a)
{ 
	ifelse(abs(x)<1,    
	 ( a2BiwWJ(a) - a1BiwWJ(a)*x) * Biweight(x) / (a0BiwWJ(a)*a2BiwWJ(a)-(a1BiwWJ(a))^2) , ifelse(abs(x)>1, 0,0)) 
}

"a0Epan"<-function(a)
{
	-1/100*5^(1/2)*(a^3+5*5^(1/2))+3/20*5^(1/2)*(a+5^(1/2))
}

"a1Epan"<-function(a)
{
	-3/400*5^(1/2)*(a^4-25)+3/40*5^(1/2)*(a^2-5)
}

"a2Epan"<-function(a)
{
	 -3/500*5^(1/2)*(a^5+25*5^(1/2))+1/20*5^(1/2)*(a^3+5*5^(1/2))     
	  }
	
	
"BoundaryEpanechnikovWJ"<-   function(x,a)
{ 
	ifelse(abs(x)<sqrt(5),    
	 ( a2Epan(a) - a1Epan(a)*x) * Epanechnikov(x) / (a0Epan(a)*a2Epan(a)-(a1Epan(a))^2) ,
	 ifelse(abs(x)>sqrt(5), 0,0)) 
}


"LambdaHat"<-
#Watson & Leadbetter estimator (Hazard Analysis I, Biometrika (1964))  -order statistics version. 
function(xin, xout, h, kfun)
{
    n<- length(xin)
    n1<-1:n
    xin.use<-sort(xin)
    arg1<-(sapply(xout, "-", xin.use))/h	
    arg2<-kfun(arg1)* 1/(n-n1+1)
    arg3<-colSums(arg2)* 1/h
    arg3
}

"BoundaryLambdahat"<-
function(xin, xout, h, kfun)
{
    n<- length(xin)
	xin<-sort(xin)
    n1<-1:n
    xout1<-xout[which(abs(xout)<h)]
    xout2<-xout[which(abs(xout)>h)]
    arg1bound<-(sapply(xout1, "-", xin))/h
    arg1notbound<-(sapply(xout2, "-", xin))/h	
    arg2notbound<-kfun(arg1notbound) * 1/(n-n1+1)

    n2<-length(xout1)
    arg2bound<-vector("numeric", length=n2) # initialize the vector to hold the boundary points calculations
	
	for(i in 1:n2)#BoundaryBiweightWJ(arg1bound ,xout1/h)         #this computes the estimate at the boundary points
	{
		arg2bound[i]<-colSums(BoundaryBiweightWJ((xout1[i]-xin)/h ,xout1[i]/h) / (n-n1+1))/h	
	}			       
	   
    arg2notbound<-colSums(arg2notbound)/h   # reassign the result to the same variable in order to save memory. 
    arg3<-c(arg2bound, arg2notbound)
    cbind(c(xout1, xout2), arg3)          #return the result
 }

"BoundaryLambdahatEpan"<-
function(xin, xout, h, kfun)
{
    n<- length(xin)
	xin<-sort(xin)
    n1<-1:n
    xout1<-xout[which(abs(xout)<h)]
    xout2<-xout[which(abs(xout)>h)]
    arg1bound<-(sapply(xout1, "-", xin))/h
    arg1notbound<-(sapply(xout2, "-", xin))/h	
    arg2notbound<-kfun(arg1notbound) * 1/(n-n1+1)

    n2<-length(xout1)
    arg2bound<-vector("numeric", length=n2) # initialize the vector to hold the boundary points calculations
	
	for(i in 1:n2)       #this computes the estimate at the boundary points
	{
		arg2bound[i]<-colSums(BoundaryEpanechnikovWJ((xout1[i]-xin)/h ,xout1[i]/h) / (n-n1+1))/h	
	}			       
	   
    arg2notbound<-colSums(arg2notbound)/h   # reassign the result to the same variable in order to save memory. 
    arg3<-c(arg2bound, arg2notbound)
    cbind(c(xout1, xout2), arg3)          #return the result
 }


"LocLinEst"<-function(xin, xout, kfun, IntKfun)
{
	MinX<-min(xin)
	MaxX<-max(xin)
	N<-length(xin)
	n<-length(xout)
	Delta<-(MaxX-MinX)/n
	PartInterval<-c(MinX, MinX+(1:n)*Delta) #Partion the interval (x[1], x[n]) into subintervals
	FirstBinCenter<-(PartInterval[1]+PartInterval[2])/2
	BinCenters<- c(FirstBinCenter, FirstBinCenter+(1:(n-1))*Delta)
	fi<-sapply(1:n, function(i, xin, PartInterval) 
						length(which(xin > PartInterval[i] & xin <= PartInterval[i+1])), 
						xin, PartInterval) #crete the bin counts
	citmp<-fi/(N-cumsum(fi)+1)
	ci<-citmp/Delta
	#### CALCULATE THE BANDWIDTH FOR THE DERIVATIVE ESTIMATE theta22####
	
	h<- PilotWidth1(xin) # +.5
	usethis<-SecondDerivativeQuarticEstimate(xout, ci)
 
	sn0<-sn.0(BinCenters, BinCenters, h, kfun)
	sn1<-sn.1(BinCenters, BinCenters, h, kfun)
	sn2<-sn.2(BinCenters, BinCenters, h, kfun)
	sn3<-sn.3(BinCenters, BinCenters, h, kfun)
	sn4<-sn.4(BinCenters, BinCenters, h, kfun)
	sn5<-sn.5(BinCenters, BinCenters, h, kfun)
	sn6<-sn.6(BinCenters, BinCenters, h, kfun)
	tn0<-tn.0(BinCenters, BinCenters, h, kfun, ci)
	tn1<-tn.1(BinCenters, BinCenters, h, kfun, ci)
	tn2<-tn.2(BinCenters, BinCenters, h, kfun, ci)
	tn3<-tn.3(BinCenters, BinCenters, h, kfun, ci)
den<-  sn0*sn2*sn4*sn6 - sn0*sn2*(sn5^2) - sn0*(sn3^2) *sn6 + 2*sn0*sn3*sn4*sn5-sn0*(sn4^3)-sn1^2*sn4*sn6 +( sn1^2)*(sn5^2 )+ 2*sn1*sn2*sn3*sn6 
	- 2*sn1*(sn3^2) *sn5 - 2*sn1*sn4*sn2*sn5 + 2*sn1*sn3*(sn4^2) - (sn2^3) *sn6 + 2*(sn2^2)*sn3*sn5 + (sn2^2)*(sn4^2)-3*sn2*sn4*sn3^2 + sn3^4	
	A1<-   sn1*sn3*sn6 -sn1*sn4*sn5 - (sn2^2) *sn6+sn2*sn3*sn5+sn2*sn4^2 -sn4*( sn3^2)
	
	A2<- sn0*sn3*sn6-sn0*sn4*sn5-sn2*sn1*sn6+sn3*sn2*sn4+sn1*sn3*sn5-(sn3^3)
	A3<- sn0*sn2*sn6-sn0*(sn4^2) -(sn1^2) *sn6+2*sn3*sn1*sn4-sn2*sn3^2
	A4<-  sn0*sn2*sn5 - sn0*sn3*sn4 - (sn1^2) *sn5+sn1*sn2*sn4+sn1*(sn3^2)-sn3*(sn2^2)
	est2<- 2* (A1*tn0 - A2*tn1 + A3*tn2 - A4*tn3)/den
#	cat("est2 = ", Delta * sum(est2^2))	
# Compute the bandwidth estimate to use:
	
	theta22<- Delta * sum(est2^2)
	RofK <- 3* sqrt(5)/25 	
	MuofK <- 3* sqrt(5)	/35
	Muse<-1/(1-IntKde(xin, max(xout), PilotWidth1(xin), IntKfun)) -1

	#huse1<-( RofK*Muse /(N * MuofK^2 * theta22))^0.2
	huse<- ( RofK*Muse /(N * MuofK^2 * Delta*sum(usethis^2)))^0.2

	
	sn0<-sn.0(BinCenters, xout, huse, kfun)
	sn1<-sn.1(BinCenters, xout, huse, kfun)
	sn2<-sn.2(BinCenters, xout, huse, kfun)
	tn0<-tn.0(BinCenters, xout, huse, kfun, ci)
	tn1<-tn.1(BinCenters, xout, huse, kfun, ci)
	estuse<-round((tn1*sn1-tn0*sn2)/(sn1^2-sn0*sn2), digits =4)
	exportData(estuse,"ttt.xls")
	#wluse<-round(BoundaryLambdahat(xin, xout, PilotWidth1(xin), Biweight), digits =4)
	plot(xout, estuse,  type="l", xlab="time (in 1000's of hours)", ylab="failure rate", ylim=c(0,1))
	#lines( wluse, lty=4, col=5)
	}

xout<-seq(0, max(suicdata), length=150)
#cat(length(suicdata), "\n", 1+2*2*log10(length(suicdata)), "\n")
LocLinEst(suicdata,xout,Epanechnikov, IntEpanechnikov)